{
 "cells": [
  {
   "cell_type": "code",
   "metadata": {
    "collapsed": true,
    "ExecuteTime": {
     "end_time": "2025-03-05T01:12:19.276070Z",
     "start_time": "2025-03-05T01:12:18.452410Z"
    }
   },
   "source": [
    "import pandas as pd"
   ],
   "outputs": [],
   "execution_count": 1
  },
  {
   "cell_type": "markdown",
   "source": [
    "# 2 Series"
   ],
   "metadata": {
    "collapsed": false
   }
  },
  {
   "cell_type": "code",
   "source": [
    "# 生成一个Series\n",
    "\n",
    "ser_obj = pd.Series(range(10, 20)) #默认索引是0-9\n",
    "print(ser_obj) #打印输出会带有类型\n",
    "print('-'*50)\n"
   ],
   "metadata": {
    "collapsed": false,
    "pycharm": {
     "name": "#%%\n"
    },
    "ExecuteTime": {
     "end_time": "2025-02-26T03:05:11.462956Z",
     "start_time": "2025-02-26T03:05:11.459872Z"
    }
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "0    10\n",
      "1    11\n",
      "2    12\n",
      "3    13\n",
      "4    14\n",
      "5    15\n",
      "6    16\n",
      "7    17\n",
      "8    18\n",
      "9    19\n",
      "dtype: int64\n",
      "--------------------------------------------------\n"
     ]
    }
   ],
   "execution_count": 3
  },
  {
   "metadata": {
    "ExecuteTime": {
     "end_time": "2025-02-26T03:05:25.392517Z",
     "start_time": "2025-02-26T03:05:25.388917Z"
    }
   },
   "cell_type": "code",
   "source": [
    "# 获取数据\n",
    "print(ser_obj.values)  #values实际是ndarray\n",
    "print(type(ser_obj.values)) #类型是ndarray\n",
    "# 获取索引\n",
    "print(ser_obj.index)  #内部自带的类型--RangeIndex\n",
    "ser_obj.dtype #数据类型"
   ],
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[10 11 12 13 14 15 16 17 18 19]\n",
      "<class 'numpy.ndarray'>\n",
      "RangeIndex(start=0, stop=10, step=1)\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "dtype('int64')"
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "execution_count": 4
  },
  {
   "cell_type": "code",
   "source": [
    "print(ser_obj[0]) \n",
    "ser_obj[9] #\n",
    "# 访问不存在的索引下标会报keyerror"
   ],
   "metadata": {
    "collapsed": false,
    "pycharm": {
     "name": "#%%\n"
    },
    "ExecuteTime": {
     "end_time": "2025-02-26T03:06:15.918591Z",
     "start_time": "2025-02-26T03:06:15.915203Z"
    }
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "10\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "19"
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "execution_count": 5
  },
  {
   "cell_type": "code",
   "source": [
    "print(ser_obj * 2)  #元素级乘法\n",
    "print(ser_obj > 15) #返回一个bool序列"
   ],
   "metadata": {
    "collapsed": false,
    "pycharm": {
     "name": "#%%\n"
    },
    "ExecuteTime": {
     "end_time": "2025-02-26T03:06:27.737639Z",
     "start_time": "2025-02-26T03:06:27.734497Z"
    }
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "0    20\n",
      "1    22\n",
      "2    24\n",
      "3    26\n",
      "4    28\n",
      "5    30\n",
      "6    32\n",
      "7    34\n",
      "8    36\n",
      "9    38\n",
      "dtype: int64\n",
      "0    False\n",
      "1    False\n",
      "2    False\n",
      "3    False\n",
      "4    False\n",
      "5    False\n",
      "6     True\n",
      "7     True\n",
      "8     True\n",
      "9     True\n",
      "dtype: bool\n"
     ]
    }
   ],
   "execution_count": 6
  },
  {
   "cell_type": "code",
   "source": [
    "#字典变为series，索引是字典的key，value是字典的value，感受非默认索引\n",
    "\n",
    "year_data = {2001: 17.8, 2005: 20.1, 2003: 16.5}\n",
    "ser_obj2 = pd.Series(year_data)\n",
    "print(ser_obj2)\n",
    "print(ser_obj2.index)\n",
    "print(ser_obj2[2001])\n",
    "ser_obj2.values"
   ],
   "metadata": {
    "collapsed": false,
    "pycharm": {
     "name": "#%%\n"
    },
    "ExecuteTime": {
     "end_time": "2025-02-26T03:06:55.707326Z",
     "start_time": "2025-02-26T03:06:55.702251Z"
    }
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "2001    17.8\n",
      "2005    20.1\n",
      "2003    16.5\n",
      "dtype: float64\n",
      "Index([2001, 2005, 2003], dtype='int64')\n",
      "17.8\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "array([17.8, 20.1, 16.5])"
      ]
     },
     "execution_count": 7,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "execution_count": 7
  },
  {
   "cell_type": "code",
   "source": [
    "#有点鸡肋\n",
    "print(ser_obj2.name) #Series名字\n",
    "print(ser_obj2.index.name)  #索引名字\n",
    "ser_obj2.name = 'temp'\n",
    "ser_obj2.index.name = 'year1'\n",
    "print('-'*50)\n",
    "print(ser_obj2.head())  #head默认显示前5行\n"
   ],
   "metadata": {
    "collapsed": false,
    "pycharm": {
     "name": "#%%\n"
    },
    "ExecuteTime": {
     "end_time": "2025-02-26T03:07:15.842024Z",
     "start_time": "2025-02-26T03:07:15.838430Z"
    }
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "None\n",
      "None\n",
      "--------------------------------------------------\n",
      "year1\n",
      "2001    17.8\n",
      "2005    20.1\n",
      "2003    16.5\n",
      "Name: temp, dtype: float64\n"
     ]
    }
   ],
   "execution_count": 8
  },
  {
   "metadata": {
    "ExecuteTime": {
     "end_time": "2025-02-26T03:08:06.522399Z",
     "start_time": "2025-02-26T03:08:06.519328Z"
    }
   },
   "cell_type": "code",
   "source": "type(ser_obj2)",
   "outputs": [
    {
     "data": {
      "text/plain": [
       "pandas.core.series.Series"
      ]
     },
     "execution_count": 9,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "execution_count": 9
  },
  {
   "cell_type": "markdown",
   "source": [
    "# 3 DataFrame"
   ],
   "metadata": {
    "collapsed": false
   }
  },
  {
   "cell_type": "code",
   "source": [
    "import numpy as np\n",
    "\n",
    "# 通过ndarray构建DataFrame\n",
    "t = pd.DataFrame(np.arange(12).reshape((3,4))) #默认索引是0-2\n",
    "print(t)\n",
    "print('-'*50)\n",
    "array = np.random.randn(5,4)\n",
    "print(array)\n",
    "print('-'*50)\n",
    "df_obj = pd.DataFrame(array)\n",
    "print(df_obj.head())"
   ],
   "metadata": {
    "collapsed": false,
    "pycharm": {
     "name": "#%%\n"
    },
    "ExecuteTime": {
     "end_time": "2025-02-26T03:08:52.477414Z",
     "start_time": "2025-02-26T03:08:52.471300Z"
    }
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "   0  1   2   3\n",
      "0  0  1   2   3\n",
      "1  4  5   6   7\n",
      "2  8  9  10  11\n",
      "--------------------------------------------------\n",
      "[[-0.6502005  -1.10275446 -0.75567439  0.45340229]\n",
      " [ 0.91321648  0.85369477 -1.58253494 -0.79668111]\n",
      " [ 0.78491262  0.37778356 -0.43135558  2.11606344]\n",
      " [ 1.26701023  1.24419553  0.38524668  0.94450446]\n",
      " [-0.39175145 -0.0256497   0.71906224  0.59264065]]\n",
      "--------------------------------------------------\n",
      "          0         1         2         3\n",
      "0 -0.650200 -1.102754 -0.755674  0.453402\n",
      "1  0.913216  0.853695 -1.582535 -0.796681\n",
      "2  0.784913  0.377784 -0.431356  2.116063\n",
      "3  1.267010  1.244196  0.385247  0.944504\n",
      "4 -0.391751 -0.025650  0.719062  0.592641\n"
     ]
    }
   ],
   "execution_count": 10
  },
  {
   "metadata": {
    "ExecuteTime": {
     "end_time": "2025-02-26T03:09:45.580374Z",
     "start_time": "2025-02-26T03:09:45.576276Z"
    }
   },
   "cell_type": "code",
   "source": "df_obj",
   "outputs": [
    {
     "data": {
      "text/plain": [
       "          0         1         2         3\n",
       "0 -0.650200 -1.102754 -0.755674  0.453402\n",
       "1  0.913216  0.853695 -1.582535 -0.796681\n",
       "2  0.784913  0.377784 -0.431356  2.116063\n",
       "3  1.267010  1.244196  0.385247  0.944504\n",
       "4 -0.391751 -0.025650  0.719062  0.592641"
      ],
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>0</th>\n",
       "      <th>1</th>\n",
       "      <th>2</th>\n",
       "      <th>3</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>-0.650200</td>\n",
       "      <td>-1.102754</td>\n",
       "      <td>-0.755674</td>\n",
       "      <td>0.453402</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>0.913216</td>\n",
       "      <td>0.853695</td>\n",
       "      <td>-1.582535</td>\n",
       "      <td>-0.796681</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>0.784913</td>\n",
       "      <td>0.377784</td>\n",
       "      <td>-0.431356</td>\n",
       "      <td>2.116063</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>1.267010</td>\n",
       "      <td>1.244196</td>\n",
       "      <td>0.385247</td>\n",
       "      <td>0.944504</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>-0.391751</td>\n",
       "      <td>-0.025650</td>\n",
       "      <td>0.719062</td>\n",
       "      <td>0.592641</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ]
     },
     "execution_count": 13,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "execution_count": 13
  },
  {
   "cell_type": "code",
   "source": [
    "t.loc[0] #单独把某一行取出来,类型是series"
   ],
   "metadata": {
    "collapsed": false,
    "pycharm": {
     "name": "#%%\n"
    },
    "ExecuteTime": {
     "end_time": "2025-02-26T03:09:27.164974Z",
     "start_time": "2025-02-26T03:09:27.161337Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0    0\n",
       "1    1\n",
       "2    2\n",
       "3    3\n",
       "Name: 0, dtype: int32"
      ]
     },
     "execution_count": 12,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "execution_count": 12
  },
  {
   "cell_type": "code",
   "source": [
    "# 列表套字典  变df\n",
    "d2 =[{\"name\" : \"xiaohong\" ,\"age\" :32,\"tel\" :10010},\n",
    "     { \"name\": \"xiaogang\" ,\"tel\": 10000} ,\n",
    "     {\"name\":\"xiaowang\" ,\"age\":22}]\n",
    "df6=pd.DataFrame(d2)\n",
    "print(df6) #缺失值会用NaN填充\n",
    "print(type(df6.values)) #ndarray"
   ],
   "metadata": {
    "collapsed": false,
    "pycharm": {
     "name": "#%%\n"
    },
    "ExecuteTime": {
     "end_time": "2025-02-26T03:10:29.265448Z",
     "start_time": "2025-02-26T03:10:29.261405Z"
    }
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "       name   age      tel\n",
      "0  xiaohong  32.0  10010.0\n",
      "1  xiaogang   NaN  10000.0\n",
      "2  xiaowang  22.0      NaN\n",
      "<class 'numpy.ndarray'>\n"
     ]
    }
   ],
   "execution_count": 14
  },
  {
   "cell_type": "code",
   "source": [
    "pd.Series(1, index=list(range(3,7)),dtype='float32')"
   ],
   "metadata": {
    "collapsed": false,
    "pycharm": {
     "name": "#%%\n"
    },
    "ExecuteTime": {
     "end_time": "2025-02-26T03:10:51.077162Z",
     "start_time": "2025-02-26T03:10:51.072670Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "3    1.0\n",
       "4    1.0\n",
       "5    1.0\n",
       "6    1.0\n",
       "dtype: float32"
      ]
     },
     "execution_count": 15,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "execution_count": 15
  },
  {
   "cell_type": "code",
   "source": [
    "#df中不同列可以是不同的数据类型,同一列必须是一个数据类型\n",
    "import pandas as pd\n",
    "import numpy as np\n",
    "dict_data = {'A': 1,\n",
    "             'B': pd.Timestamp('20190926'),\n",
    "             'C': pd.Series(1, index=list(range(4)),dtype='float32'),\n",
    "             'D': np.array([1,2,3,4],dtype='int32'),\n",
    "             'E': [\"Python\",\"Java\",\"C++\",\"C\"],\n",
    "             'F': 'wangdao' }\n",
    "df_obj2 = pd.DataFrame(dict_data)\n",
    "print(df_obj2)\n",
    "print('-'*50)\n",
    "print(df_obj2.index) #行索引,重点\n",
    "#补课改变\n",
    "# df_obj2.index[0]=2  不可以单独修改某个索引值\n",
    "print(df_obj2.columns) #列索引，重点\n",
    "df_obj2.dtypes #每一列的数据类型，重点,不同列可以是不同的数据类型"
   ],
   "metadata": {
    "collapsed": false,
    "pycharm": {
     "name": "#%%\n"
    },
    "ExecuteTime": {
     "end_time": "2025-02-26T03:11:20.279184Z",
     "start_time": "2025-02-26T03:11:20.272539Z"
    }
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "   A          B    C  D       E        F\n",
      "0  1 2019-09-26  1.0  1  Python  wangdao\n",
      "1  1 2019-09-26  1.0  2    Java  wangdao\n",
      "2  1 2019-09-26  1.0  3     C++  wangdao\n",
      "3  1 2019-09-26  1.0  4       C  wangdao\n",
      "--------------------------------------------------\n",
      "Index([0, 1, 2, 3], dtype='int64')\n",
      "Index(['A', 'B', 'C', 'D', 'E', 'F'], dtype='object')\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "A            int64\n",
       "B    datetime64[s]\n",
       "C          float32\n",
       "D            int32\n",
       "E           object\n",
       "F           object\n",
       "dtype: object"
      ]
     },
     "execution_count": 16,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "execution_count": 16
  },
  {
   "cell_type": "code",
   "source": [
    "# 感受日期,初始化df，设置行索引，列索引\n",
    "dates = pd.date_range('20230101', periods=6) #默认freq='D'，即天\n",
    "df = pd.DataFrame(np.random.randn(6, 4), index=dates, columns=list('ABCD'))\n",
    "print(df)\n",
    "print('-'*50)\n",
    "print(df.index)\n"
   ],
   "metadata": {
    "collapsed": false,
    "pycharm": {
     "name": "#%%\n"
    },
    "ExecuteTime": {
     "end_time": "2025-02-26T03:12:59.519264Z",
     "start_time": "2025-02-26T03:12:59.514577Z"
    }
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "                   A         B         C         D\n",
      "2023-01-01  0.168961  0.258563 -0.865196 -1.315408\n",
      "2023-01-02 -1.600190  0.301173 -2.285979 -0.367068\n",
      "2023-01-03 -1.322561 -1.175522 -1.480280  0.053998\n",
      "2023-01-04 -0.329548  0.412165  0.899430 -0.405900\n",
      "2023-01-05 -0.802247 -0.536371 -0.448451  0.667320\n",
      "2023-01-06  2.939782  0.720573 -0.173812 -0.114041\n",
      "--------------------------------------------------\n",
      "DatetimeIndex(['2023-01-01', '2023-01-02', '2023-01-03', '2023-01-04',\n",
      "               '2023-01-05', '2023-01-06'],\n",
      "              dtype='datetime64[ns]', freq='D')\n"
     ]
    }
   ],
   "execution_count": 17
  },
  {
   "cell_type": "code",
   "source": [
    "#取数据\n",
    "print(df_obj2)\n",
    "print(type(df_obj2))\n",
    "#pd中使用索引名来取某一行，或者列\n",
    "print(df_obj2['B'])\n",
    "#把df的某一列取出来是series\n",
    "print(type(df_obj2['B']))"
   ],
   "metadata": {
    "collapsed": false,
    "pycharm": {
     "name": "#%%\n"
    },
    "ExecuteTime": {
     "end_time": "2025-02-26T03:14:03.343157Z",
     "start_time": "2025-02-26T03:14:03.339434Z"
    }
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "   A          B    C  D       E        F\n",
      "0  1 2019-09-26  1.0  1  Python  wangdao\n",
      "1  1 2019-09-26  1.0  2    Java  wangdao\n",
      "2  1 2019-09-26  1.0  3     C++  wangdao\n",
      "3  1 2019-09-26  1.0  4       C  wangdao\n",
      "<class 'pandas.core.frame.DataFrame'>\n",
      "0   2019-09-26\n",
      "1   2019-09-26\n",
      "2   2019-09-26\n",
      "3   2019-09-26\n",
      "Name: B, dtype: datetime64[s]\n",
      "<class 'pandas.core.series.Series'>\n"
     ]
    }
   ],
   "execution_count": 18
  },
  {
   "cell_type": "code",
   "source": [
    "#增加列数据，列名是自定义的\n",
    "df_obj2['G'] = df_obj2['D'] + 4\n",
    "print(df_obj2.head(n=3))"
   ],
   "metadata": {
    "collapsed": false,
    "pycharm": {
     "name": "#%%\n"
    },
    "ExecuteTime": {
     "end_time": "2025-02-26T03:14:48.296515Z",
     "start_time": "2025-02-26T03:14:48.292434Z"
    }
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "   A          B    C  D       E        F  G\n",
      "0  1 2019-09-26  1.0  1  Python  wangdao  5\n",
      "1  1 2019-09-26  1.0  2    Java  wangdao  6\n",
      "2  1 2019-09-26  1.0  3     C++  wangdao  7\n"
     ]
    }
   ],
   "execution_count": 20
  },
  {
   "cell_type": "code",
   "source": [
    "# 删除列\n",
    "del(df_obj2['G'])\n",
    "print(df_obj2.head())\n",
    "\n"
   ],
   "metadata": {
    "collapsed": false,
    "pycharm": {
     "name": "#%%\n"
    },
    "ExecuteTime": {
     "end_time": "2025-02-26T03:15:41.349768Z",
     "start_time": "2025-02-26T03:15:41.345521Z"
    }
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "   A          B    C  D       E        F\n",
      "0  1 2019-09-26  1.0  1  Python  wangdao\n",
      "1  1 2019-09-26  1.0  2    Java  wangdao\n",
      "2  1 2019-09-26  1.0  3     C++  wangdao\n",
      "3  1 2019-09-26  1.0  4       C  wangdao\n"
     ]
    }
   ],
   "execution_count": 21
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 2
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython2",
   "version": "2.7.6"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 0
}
